Well here it is! Many thanks to David Fedor for his help on this. I would guess that we each got 90% independently, combining our knowledge made it 95% and the resulting synergy bumped it up to 99%. It's up to you to fill in the remaining 1%É This header file is being placed in the uiowa submissions directory as nwtpkg.h. Cheers, Matthew ------8<----Cut Here----8<-----#ifndef NWT_PACKAGE_FORMAT_H #define NWT_PACKAGE_FORMAT_H /* Newton package file format header file. * * - This file is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * - This file is freely distributable. * - The information in this file can be used in other products without * acknowledgement (though acknowledgement would be nice). * - If the information in this file is republished it's source should be * acknowledged. * - If the information is republished with changes, it's source should be * acknowledged and my responsibility for the changes disclaimed. * * This file contains information that was derived (with permission) from a * file that is copyright 1994 by David Fedor. His contribution to the * discovery of the information in this file is gratefully acknowledged as * is his generosity for allowing it to be passed on. All errors and * omissions are, of course, my own. * * Apple and Newton are trademarks of Apple Computer, Inc., registered in * the United States and other countries. MessagePad, NewtonScript and * Newton ToolKit are trademarks of Apple Computer, Inc. * * Document history: * * v1.0 13 Dec 1994 Matthew Faupel * Initial version. */ /* The format of a package file is as follows: * * FixedPkgHdr * PartHdr[partCount] * authorCopyright[] * packageName[] * partDescription[partCount][] * NTKcopyright[] * records[É] * * Each of these is described below. */ /* Types used in the header structure. The lengths given following the * types must be true for this file definition header to be correct. If * these typedefs do not result in types of the given lengths on your * system, adjust them so that they do. */ typedef unsigned char uchar; /* 1 byte */ typedef unsigned short unichar; /* 2 bytes (Unicode character) */ typedef unsigned short ushort; /* 2 bytes */ typedef unsigned long ulong; /* 4 bytes */ /* Characters in the package signature */ #define PF_PACKAGE_SIG "package0" #define PF_PACKAGE_SIG_LENGTH (sizeof(PF_PACKAGE_SIG)-1) /* Flags used in the package header: * Package will not be compressed on the newt */ #define PF_UNCOMPRESSED 0x10000000 /* Can't copy this package */ #define PF_COPYPROTECTED 0x40000000 /* Set on a dispatch-only auto part */ #define PF_DISPATCHONLY 0x80000000 struct _FixedPkgHdr { /* Signature to indicate that this is a package file (the characters * 'package0'; note that it is not NUL terminated). */ char packageSig[PF_PACKAGE_SIG_LENGTH]; /* PF_PACKAGE_SIG */ /* Four more readable characters; usage unknown. Characters that have * been seen are: 'xxxx', ' no ' and 'none'. */ char something[4]; /* Combination of the PF_ flags defined above. */ ulong packageFlags; /* Version number of the package as supplied by the developer. Note * that this is a whole integer. */ ulong versionNo; ushort zeroPad1; /* Length of the author+copyright string supplied in the subsequent * strings section of the header. Repeated for some reason. */ ushort authorCopyrightLength; ushort authorCopyrightLength2; /* Length of the package name string supplied in the subsequent strings * section of the header. */ ushort packageNameLength; /* Length of the entire package file. */ ulong packageLength; /* File creation and modification timestamps (Apple format, i.e. seconds * since 00:00 1 Jan 1904, local time). The two values are normally * identical, though the modified time has been 0 in some cases. */ ulong createdTimeStamp; ulong modifiedTimeStamp; ulong zeroPad2; /* Offset in the file of the first non-header record. */ ulong firstRecordOffset; /* Number of parts in this package. */ ulong partCount; }; typedef struct _FixedPkgHdr FixedPkgHdr; /* The fixed package header is followed immediately by an array of PartData * items. The number of items is given by the partCount field of the * header. */ struct _PartHdr { /* Offset of start of part data (from firstRecordOffset in header). */ ulong partOffset; /* Length of the data in the part (repeated for some reason) */ ulong lengthOfData; ulong lengthOfData2; /* 'form', 'book', 'auto' etc. All values I have seen have only been * four characters, so this field might only be 4 characters. However, * the following four characters are always zero and I've seen a * technical paper which says that type 'auto!' is a valid type, hence I * reckon the field is in fact 8 characters long. */ uchar partType[8]; /* Unknown use. Always 0x00000081. */ ulong value00000081; /* Offset (from beginning of strings block) and length of the string * describing this part. This is often just the part type repeated * (e.g. "form"), but can be anything. */ ushort partStringOffset; ushort partStringLength; ulong zeroPad1; }; typedef struct _PartHdr PartHdr; /* Following the part data is a block of variable length strings. These * are: */ /* Copyright string from NTK project window. NULNUL terminated. Length * given by authorCopyrightLength in the header. * * unichar authorCopyright[]; */ /* Name of package as seen in "Remove Software". NULNUL terminated. Length * given by packageNameLength in the header. * * unichar packageName[]; */ /* Part description (as many of these as there are parts). Not necessarily * NUL terminated. Length(s) given in the appropriate part header(s). * * char partString[]; */ /* Copyright string inserted by NTK. This is padded with trailing bytes * containing 0xFF such that the next data (which is the first record of the * package proper and is pointed to by firstRecordOffset) is on a four byte * boundary relative to the start of the file. * * char NTKcopyright[]; */ /* Before going into the details of the three types of data, a quick * introduction to vpums (a name coined by David Fedor). "vpum" is the name * given to the 32 bit values used by the Newton; it's an acronym derived * from Value, Pointer, Unusual and Magic-pointer, that being the four types * of data that the value can represent. * * The type information is encoded in the lowest two bits of the 32 bit * value and the upper 30 bits contain the value information. The types, * what they represent and how they are interpreted are as follows: * * Low bits Type Interpretation * ------------------------------------------------------------------------* 00 Value An immediate signed 30-bit integer value. The integer * is derived by doing a signed right shift by 2, e.g. * 0x00000100 = integer 64 (decimal), * 0xFFFFFFFC = integer -1 (decimal). * * 01 Pointer In memory, a pointer to another NewtonScript record, in * the package file an absolute offset to another record. * The exact address/offset is obtained by masking out the * low two bits of the 32-bit value, e.g. 0x00000105 means * offset 0x0104 in the package. * * 10 Unusual Used to encode a variety of immediate values. The ones * that I have come across are: * 0x00000002 - Nil * 0x0000001A - True * 0x000UUUU6 - Unicode character \uUUUU, e.g. 0x00000416 * is the NewtonScript constant $A * 0x00055552 - Symbol class (explained below) * * 11 Magic The value encodes a NewtonScript magic pointer, the * values represented in NewtonScript as @nnnn and used to * refer to built-in objects in NewtonOS. The magic * pointer number is derived from the 32-bit value by * dividing by 4, e.g. 0x00000007 is @1. */ typedef ulong vpum; #define VPUM_TYPE(v) ((v)&0x03) #define VPUM_TYPE_VALUE 0 #define VPUM_TYPE_POINTER 1 #define VPUM_TYPE_UNUSUAL 2 #define VPUM_TYPE_MAGIC 3 #define VPUM_AS_VALUE(v) (((long) (v)) >> 2) #define VPUM_AS_POINTER(v) ((v) & 0xFFFFFFFC) #define VPUM_AS_UNICODE(v) ((unichar) ((v) >> 4)) #define VPUM_AS_MAGIC(v) ((v) >> 2) #define VPUM_NIL 0x00000002 #define VPUM_TRUE 0x0000001A #define VPUM_SYMBOL_CLASS 0x00055552 /* Back to the recordÉ following the header data are the records for the * package. These are always aligned on an 8-byte boundary relative to * firstRecordOffset, which itself is aligned on a 4-byte boundary relative * to the start of the file. The "gaps" between the records are filled with * rubbish, quite often (but not always) this is 0xBABAÉ or 0xBADBADBAÉ * * Data records have the following general format: */ #define PF_RECORD_TYPE_DATA 0x40 #define PF_RECORD_TYPE_ARRAY 0x41 #define PF_RECORD_TYPE_FRAME 0x43 struct _PkgRecord { /* Note that only 3 bytes are used to hold the length of the record. */ uchar recordLength[3]; /* This byte indicates what the record represents, one of raw data, a * NewtonScript array or a NewtonScript frame. The three values used * are given above. */ uchar recordType; /* These flags are always zero in the package file, though I believe * that the in-memory representation of this data is similar to the * in-file representation and when in-memory this flags field is used. */ ulong recordFlags; /* This gives more detailed information about the record. The exact * meaning of the field depends on the recordType. See below. */ vpum recordClass; /* data follows here if record has dataÉ see below. */ }; typedef struct _PkgRecord PkgRecord; #define PF_RECORD_LENGTH(r) ((((ulong) (r).recordLength[0]) << 16) + \ (((ulong) (r).recordLength[1]) << 8) + \ (r).recordLength[2]) /* Data records. (recordType = 0x40). * * Data records are used to represent any NewtonScript data that is not an * array or frame. Examples of this type of data are Unicode strings, * icons, symbols and so on. * * For data records, the recordClass field contains a vpum that indicates * the class of the data. Usually this is an offset to another data record * that gives the symbol naming the data type. For instance, for unicode * strings, the recordClass field will be an offset to a symbol record * defining the symbol "String". Immediately following the recordClass * field comes the byte data for the record. As an example, a String record * might look like this: * * 00 00 16 40 Length = 22, type = data * 00 00 00 00 Flags = 0 (as always) * 00 00 02 85 Offset of String symbol record (see below) * 00 46 00 72 Unicode string "Fred" including terminating NULNUL. * 00 65 00 64 * 00 00 * * Symbol records are a special type of data record. Their recordClass is * always the "unusual" type vpum 0x00055552 and the data is a four byte * hash value, followed by the (ASCII not Unicode) string giving the symbol * name. The string is NUL terminated. For instance, the following is the * symbol record for the symbol "String": * * 00 00 17 40 Length = 23, type = data * 00 00 00 00 Flags = 0 * 00 05 55 52 VPUM_SYMBOL_CLASS * 18 10 F3 5F Hash value * 53 74 72 69 "String" * 6E 67 00 * * The hash value for symbols is calculated by the formula: * * (Sum of toupper( c ) ) * 0x9e3779b9 * * for all characters "c" in the symbol. */ struct _DataRecord { uchar recordLength[3]; uchar recordType; /* 0x40 */ ulong recordFlags; /* 0 */ vpum recordClass; /* Offset to symbol record */ uchar recordData[1]; }; typedef struct _DataRecord DataRecord; #define PF_DATA_SIZE(r) (PF_RECORD_LENGTH(r)-sizeof(PkgRecord)) struct _SymbolRecord { uchar recordLength[3]; uchar recordType; /* 0x40 */ ulong recordFlags; /* 0 */ vpum recordClass; /* VPUM_SYMBOL_CLASS */ ulong symbolHashValue; uchar symbolName[1]; }; typedef struct _SymbolRecord SymbolRecord; #define PF_SYMBOL_LENGTH(r) (PF_RECORD_LENGTH(r)-sizeof(SymbolRecord)+1) /* Array records. (recordType = 0x41). * * Array records are used to represent NewtonScript arrays, both untyped * ( [ x, y, z ] ) and typed ( [type: x, y, z ] ). They are also used to * represent certain internal arrays. * * Similar to data records, the recordClass field of an array record * normally points to a symbol record indicating the type of the data. So, * for instance, for an array of stepChildren the recordClass field points * to a symbol record defining the symbol "stepChildren". For untyped * NewtonScript arrays, the recordClass field points to a symbol record for * the symbol "Array". * * The contents of the array are represented by a series of vpums following * the recordClass. Immediate values are represented by immediate value * vpums; array elements that are themselves complex types are represented * by vpum offsets to other records. * * Certain array records representing internal housekeeping arrays have * recordClass values that are not offsets to symbol records. The two that * I have come across are the value VPUM_NIL, which is used for the very * first record in any part data, and a vpum integer value used for frame * maps, which are explained in more detail below. * * A quick digression into the format of the part data. As far as I can * tell, the first record read is always an array record with recordClass * VPUM_NIL, as mentioned above. This contains one element which is an * offset to a frame record. This frame record then contains all the * information about the part, be it book, form or whatever. * * An example of the encoding of a NewtonScript array (courtesy of David * Fedor): * * NewtonScript array: [6, nil, @180] * Equivalent record: * * 00 00 18 41 Length = 24, type = array * 00 00 00 00 Flags = 0 * 00 00 07 09 vpum offset to symbol record for "Array" * 00 00 00 18 vpum 0x18 is immediate value 6 * 00 00 00 02 vpum 0x2 is Nil * 00 00 02 D3 vpum 0x2d3 is magic ptr @180 */ struct _ArrayRecord { uchar recordLength[3]; uchar recordType; /* 0x41 */ ulong recordFlags; /* 0 */ vpum recordClass; vpum arrayElement[1]; }; typedef struct _ArrayRecord ArrayRecord; #define PF_ARRAY_SIZE(r) ((PF_RECORD_LENGTH(r)-sizeof(PkgRecord))/sizeof(vpum)) /* Frame records. (recordType = 0x43). * * Frame records are (surprise, surprise :-) used to represent NewtonScript * frames. For frame records, the recordClass field is always an offset to * a frame map array (explained shortly). The contents of the slots of the * frame are then represented by a block of vpums following the recordClass * (in a similar fashion to array elements for array records). Which value * belongs to which slot is determined by the frame map - the values are in * the same order as the slot names in the map. * * Speaking of which: The frame map is an array record containing vpum * offsets to the symbol records that define the names of the slots in the * frame, so the full representation of a frame is something like: * * FrameRecord /--> ArrayRecord * recordLength; | recordLength; * recordType = 0x43 | recordType = 0x41 * recordFlags = 0 | recordFlags = 0 * recordClass -------/ recordClass = see (1) below * NIL (but see (2) below) * ------------> SymbolRecord for name of slot 0 * É ------------> SymbolRecord for name of slot 1 * É * * (1) The recordClass of a frame map record is a combination of flags * giving information about the contents of the frame map. The flags * are: * 0x00000004 Frame map is sorted by symbol hash value * Frames containing 21 or more slots are sorted by * slot name hash value (for lookup speed), with the * exception of CodeBlock _argFrame frames (which rely * on ordering to match up with the NewtonScript * bytecode). This flag is set when this has been * done. * 0x00000008 I don't know - please tell me if you figure it out! * It seems to have something to do with whether the * frame is a normal frame or part of a proto or other * frame not used directly, but that's not it * exactlyÉ * 0x00000010 Frame map contains a _proto slot. * * (2) In the majority of cases, the first element of the frame map is NIL, * however occasionally it is a vpum offset to another frame map array. * If this is the case, then the elements of the pointed to array are * counted *before* those of the original map when working out which * symbol to use for a slot name. e.g. if we have the following: * * Frame /-> Array /-> Array * recordClass -/ recordClass = 0 | recordClass = 0 * ----------------/ NIL * --> 'Third --> 'First * --> 'Fourth --> 'Second * * * Then slot 0 has the name "First", slot 1 "Second", slot 2 "Third", * and slot 3 "Fourth". */ struct _FrameRecord { uchar recordLength[3]; uchar recordType; /* 0x43 */ ulong recordFlags; /* 0 */ vpum recordClass; /* Always an offset to a frame map */ vpum frameSlotValue[1]; }; typedef struct _FrameRecord FrameRecord; #define PF_FRAME_SIZE(r) ((PF_RECORD_LENGTH(r)-sizeof(PkgRecord))/sizeof(vpum)) #define PF_FM_SORTED_BY_HASH 0x00000004 #define PF_FM_USAGE_UNKNOWN 0x00000008 #define PF_FM_HAS_PROTO_SLOT 0x00000010 struct _FrameMap { uchar recordLength[3]; uchar recordType; /* 0x41 */ ulong recordFlags; /* 0 */ vpum recordClass; /* Combination of PF_FM_É above */ vpum nextMap; /* Offset of next map or VPUM_NIL */ vpum frameSlotName[1]; /* Offset of slot name symbol record */ }; typedef struct _FrameMap FrameMap; #define PF_FRAME_MAP_SIZE(r) \ ((PF_RECORD_LENGTH(r)-sizeof(PkgRecord))/sizeof(vpum)-1) /* That's it! The parts that I'm unsure of and would appreciate information * about are: * * - Any of the fields called "zeroPad". * - The value00000081 field in the part data header. * - The meaning of the PF_FM_USAGE_UNKNOWN flag in frame maps. * * Plus of course any omissions and/or errors that I'm unaware of (I've not * seen a multi-part package first hand for instance). * * * Matthew Faupel * 13 Dec 1994 */ #endif